{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "09fa0727-544d-42b8-96bc-2e63a55cb530",
   "metadata": {},
   "source": [
    "# Debugging with Evidently Test Suites and Reports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23c1c23f31d03fc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "import pandas as pd\n",
    "\n",
    "from evidently import DataDefinition\n",
    "from evidently import Dataset\n",
    "from evidently import Report\n",
    "from evidently.presets import DataDriftPreset\n",
    "\n",
    "from joblib import dump, load\n",
    "\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44b49d335185015b",
   "metadata": {},
   "source": [
    "## Load data and model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8fe4f358cf24845",
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_data = pd.read_parquet('data/reference.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f81b3ba799db6c09",
   "metadata": {},
   "outputs": [],
   "source": [
    "current_data = pd.read_parquet('data/green_tripdata_2022-02.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "797e28e0f316de1c",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('models/lin_reg.bin', 'rb') as f_in:\n",
    "    model = load(f_in)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3f9f40226f5412d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# data labeling\n",
    "target = \"duration_min\"\n",
    "num_features = [\"passenger_count\", \"trip_distance\", \"fare_amount\", \"total_amount\"]\n",
    "cat_features = [\"PULocationID\", \"DOLocationID\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "771ff16eee8d4e40",
   "metadata": {},
   "outputs": [],
   "source": [
    "problematic_data = current_data.loc[(current_data.lpep_pickup_datetime >= datetime.datetime(2022,2,2,0,0)) & \n",
    "                               (current_data.lpep_pickup_datetime < datetime.datetime(2022,2,3,0,0))]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5bcdf6367f12095a",
   "metadata": {},
   "source": [
    "## Generate Report with Tests (ex Test Suite)\n",
    "\n",
    "### Note\n",
    "\n",
    "Since Evidently 0.7.0 release, Test Suite is integrated into reports.\n",
    "\n",
    "To include tests into report, you just past tests to metric or provide `include_tests=True` to include default tests (if provided).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c69323606ad94423",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_definition = DataDefinition(\n",
    "    numerical_columns=num_features + ['prediction'],\n",
    "    categorical_columns=cat_features,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eba31f2094f3501d",
   "metadata": {},
   "outputs": [],
   "source": [
    "problematic_data['prediction'] = model.predict(problematic_data[num_features + cat_features].fillna(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ccfe2916aa1df406",
   "metadata": {},
   "outputs": [],
   "source": [
    "report = Report([DataDriftPreset()], include_tests=True)\n",
    "\n",
    "ref_dataset = Dataset.from_pandas(ref_data, data_definition=data_definition)\n",
    "problematic_dataset = Dataset.from_pandas(ref_data, data_definition=data_definition)\n",
    "\n",
    "\n",
    "run = report.run(reference_data=ref_dataset, current_data=problematic_dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65ef103f31a14e47",
   "metadata": {},
   "outputs": [],
   "source": [
    "run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1899d6782d6fc0be",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
